######################################################/
## Anticipated Election Result and Protest Voting    ##
## -- Replication Material (Data Preparation Part 1) ##
## by: Christian Schimpf                             ##
## Version: July 01, 2019                            ##
## R-Version: 3.4.0.                                 ##
######################################################/

####
# NOTE: In order to run the file properly, please specify working
# directories and any files paths accordingly. To identify paths
# that need to be changed accordingly, search for "#ACHTUNG#" using
# STR+F.


#Clear the working space
rm(list=ls()) 

# Set working directory 
#ACHTUNG# (Change path to folder in which the three Making Electoral Democracy Work Datasets are located)
setwd("<FOLDER>")


#Load Necessary Packages

packages <- c("ggplot2","foreign","stargazer","car",
              "dplyr", "Zelig","ZeligChoice","nnet",
              "reshape2", "effects", "stargazer",
              "readstata13", "interplot", "margins",
              "cowplot", "gridExtra", "broom", "rstanarm")
for (p in packages) {
  if (p %in% installed.packages()[,1]) require(p, character.only=T)
  else {
    install.packages(p)
    library(p, character.only=T)
  }
}


########################################################################/
##### /// Part 1 - Prep MEDW Data                  ###################### 
########################################################################/

# Because the aggregated MEDW dataset did not included all the variables 
# used, I relied on the datasets that were published separately for the 
# samples of each of the three provinces collected for the national election.
# Below, I will load the dataset and generate a subset containing only the
# variables that are used later on. I will repeat the process for each
# of the three datasets and then merge them to one dataset.
# Note further that throughout the scripts, I make references to Party 1, Party 2 etc.
# The corresponding parties are:
# Party 1 = Conservative Party
# Party 2 = New Democratic Party
# Party 3 = Liberal Party 
# Party 4 = Bloc Quebecois
# Party 5 = Green Party

#### >>> Part 1A Load and Prep Dataset for Quebec ####

#load dataset:
dfQuebec <- read.dta("Quebec_federal_2015.dta", convert.factors = FALSE)

# Generate variables with names comparable to aggregated dataset:

# Party Ratings:
dfQuebec$Q17_party1 <- dfQuebec$Q17A
dfQuebec$Q17_party2 <- dfQuebec$Q17B
dfQuebec$Q17_party3 <- dfQuebec$Q17C
dfQuebec$Q17_party4 <- dfQuebec$Q17D
dfQuebec$Q17_party5 <- dfQuebec$Q17E

#Win chances attributed by respondent to each of the parties to win
#seat in riding:
dfQuebec$Q28_party1 <- dfQuebec$Q28_1
dfQuebec$Q28_party2 <- dfQuebec$Q28_2
dfQuebec$Q28_party3 <- dfQuebec$Q28_3
dfQuebec$Q28_party4 <- dfQuebec$Q28_4
dfQuebec$Q28_party5 <- dfQuebec$Q28_5

#Ideological Placement of parties:
dfQuebec$Q30_party1 <- dfQuebec$Q30A2_1 
dfQuebec$Q30_party2 <- dfQuebec$Q30A2_2
dfQuebec$Q30_party3 <- dfQuebec$Q30A2_3
dfQuebec$Q30_party4 <- dfQuebec$Q30A2_4
dfQuebec$Q30_party5 <- dfQuebec$Q30A2_5

# Ideological Selfplacement:
dfQuebec$Q30_1 <- dfQuebec$Q30A

# Election ID for Province of Quebec:
dfQuebec$ELECID <- 1

# Are view reflected in parliament:
dfQuebec$Q49 <- dfQuebec$Q49A

# Party Respondent feels close to:
dfQuebec$Q47_1 <- dfQuebec$Q47A

# Satisfaction with federal government performance (national level)
dfQuebec$Q13A <- dfQuebec$Q14

# Generate Dummy for Postsecondary education (0=no post secondary education,
# 1=postsecondary education) -> coded based on attainement

dfQuebec$POSTSECONDARY <- NA
dfQuebec$POSTSECONDARY <- ifelse(dfQuebec$SD4==9 |
                                   dfQuebec$SD4==10 |
                                   dfQuebec$SD4==11, 1,0 )
dfQuebec$POSTSECONDARY <- as.factor(dfQuebec$POSTSECONDARY)
levels(dfQuebec$POSTSECONDARY) <- c("No University Education", "University Education")

# Economic evaluations of Canada federal level (retrospective):
dfQuebec$Q15A <- dfQuebec$Q16

# Importance of national election in general:
dfQuebec$ElectionImportance <- dfQuebec$Q34_1
dfQuebec$ElectionImportance[dfQuebec$ElectionImportance==99] <- NA

#Generate list with variables to be kept:

myvarsQuebec <- c("respid", "ELECID", "Q1B_BEGINNING", "Q1B_END", 
                  "Q2", "Q3", "Q3B", "Q5", "Q13A", "Q15A",
                  "Q19_1", "Q19_2", "Q19_3", "Q19_4", "Q19_5",
                  "Q17_party1", "Q17_party2", "Q17_party3", "Q17_party4", "Q17_party5",
                  "Q28_party1", "Q28_party2", "Q28_party3", "Q28_party4", "Q28_party5",
                  "Q30_party1", "Q30_party2", "Q30_party3", "Q30_party4", "Q30_party5",
                  "Q29", "Q30_1", "Q47_1", "Q49", "POSTSECONDARY", "PQ6", "Q18", "PQ5_1",
                  "PQ5_2", "gend", "age", "Q27", "Q27A", "Q26",
                  "Q20", "Q35_4", "Q11", "SD9", "POST_WEIGHT1", "POST_WEIGHT2",
                  "POST_WEIGHT3", "POST_WEIGHT3B", "POST_WEIGHT4", "ElectionImportance")

# Generate subsetted dataset for Quebec:
dfQuebecMEDW <- dfQuebec[myvarsQuebec]


#### >>> Part 1B Load and Prep Dataset for British Columbia ####

dfBritishC <- read.dta("British_Columbia_federal_2015.dta", convert.factors = FALSE)

# Generate variables with names comparable to aggregated dataset:

# Party Ratings:
dfBritishC$Q17_party1 <- dfBritishC$Q17A
dfBritishC$Q17_party2 <- dfBritishC$Q17B
dfBritishC$Q17_party3 <- dfBritishC$Q17C
dfBritishC$Q17_party4 <- NA
dfBritishC$Q17_party5 <- dfBritishC$Q17E

#Win chances attributed by respondent to each of the parties to win
#seat in riding:
dfBritishC$Q28_party1 <- dfBritishC$Q28_1
dfBritishC$Q28_party2 <- dfBritishC$Q28_2
dfBritishC$Q28_party3 <- dfBritishC$Q28_3
dfBritishC$Q28_party4 <- NA
dfBritishC$Q28_party5 <- dfBritishC$Q28_5

#Ideological Placement of parties:
dfBritishC$Q30_party1 <- dfBritishC$Q30A2_1 
dfBritishC$Q30_party2 <- dfBritishC$Q30A2_2
dfBritishC$Q30_party3 <- dfBritishC$Q30A2_3
dfBritishC$Q30_party4 <- NA
dfBritishC$Q30_party5 <- dfBritishC$Q30A2_5

#Leader Ratings: Because the Bloc Quebecois Leader was not rated by respondents
#from British Columbia, the variable is not included in the BC Federal Election 
#MEDW dataset. To easy the merge later on, the variable is generated and set to
#missing here:
dfBritishC$Q19_4 <- NA

# Ideological Selfplacement:
dfBritishC$Q30_1 <- dfBritishC$Q30A

# Election ID for Province of British Columbia:
dfBritishC$ELECID <- 2

# Are view reflected in parliament:
dfBritishC$Q49 <- dfBritishC$Q49A

# Party Respondent feels close to:
dfBritishC$Q47_1 <- dfBritishC$Q47A

# Satisfaction with federal government performance (national level)
dfBritishC$Q13A <- dfBritishC$Q14

# Generate Dummy for Postsecondary education (0=no post secondary education,
# 1=postsecondary education) -> coded based on attainement

dfBritishC$POSTSECONDARY <- NA
dfBritishC$POSTSECONDARY <- ifelse(dfBritishC$SD4==9 |
                                     dfBritishC$SD4==10 |
                                     dfBritishC$SD4==11, 1,0 )
dfBritishC$POSTSECONDARY <- as.factor(dfBritishC$POSTSECONDARY)
levels(dfBritishC$POSTSECONDARY) <- c("No University Education", "University Education")

# Economic evaluations of Canada federal level (retrospective):
dfBritishC$Q15A <- dfBritishC$Q16

# Importance of national election in general:
dfBritishC$ElectionImportance <- dfBritishC$Q34_1
dfBritishC$ElectionImportance[dfBritishC$ElectionImportance==99] <- NA


#Generate list with variables to be kept:

myvarsBritishC <- c("respid", "ELECID", "Q1B_BEGINNING", "Q1B_END", 
                    "Q2", "Q3", "Q3B", "Q5", "Q13A", "Q15A",
                    "Q19_1", "Q19_2", "Q19_3", "Q19_4", "Q19_5",
                    "Q17_party1", "Q17_party2", "Q17_party3", "Q17_party4", "Q17_party5",
                    "Q28_party1", "Q28_party2", "Q28_party3", "Q28_party4", "Q28_party5",
                    "Q30_party1", "Q30_party2", "Q30_party3", "Q30_party4", "Q30_party5",
                    "Q29", "Q30_1", "Q47_1", "Q49", "POSTSECONDARY", "PQ6", "Q18", "PQ5_1",
                    "PQ5_2", "gend", "age", "Q27", "Q27A", "Q26",
                    "Q20", "Q35_4", "Q11", "SD9", "POST_WEIGHT1", "POST_WEIGHT2",
                    "POST_WEIGHT3", "POST_WEIGHT3B", "POST_WEIGHT4", "ElectionImportance")
# Generate subsetted dataset for BritishC:
dfBritishCMEDW <- dfBritishC[myvarsBritishC]


#### >>> Part 1C - Load and Prep Dataset for Ontario ####

dfOntario <- read.dta("Ontario_federal_2015.dta", convert.factors = FALSE)

# Generate variables with names comparable to aggregated dataset:

# Party Ratings:
dfOntario$Q17_party1 <- dfOntario$Q17A
dfOntario$Q17_party2 <- dfOntario$Q17B
dfOntario$Q17_party3 <- dfOntario$Q17C
dfOntario$Q17_party4 <- NA
dfOntario$Q17_party5 <- dfOntario$Q17E

#Win chances attributed by respondent to each of the parties to win
#seat in riding:
dfOntario$Q28_party1 <- dfOntario$Q28_1
dfOntario$Q28_party2 <- dfOntario$Q28_2
dfOntario$Q28_party3 <- dfOntario$Q28_3
dfOntario$Q28_party4 <- NA
dfOntario$Q28_party5 <- dfOntario$Q28_5

#Ideological Placement of parties:
dfOntario$Q30_party1 <- dfOntario$Q30A2_1 
dfOntario$Q30_party2 <- dfOntario$Q30A2_2
dfOntario$Q30_party3 <- dfOntario$Q30A2_3
dfOntario$Q30_party4 <- NA
dfOntario$Q30_party5 <- dfOntario$Q30A2_5

#Leader Ratings: Because the Bloc Quebecois Leader was not rated by respondents
#from British Columbia, the variable is not included in the BC Federal Election 
#MEDW dataset. To easy the merge later on, the variable is generated and set to
#missing here:
dfOntario$Q19_4 <- NA

# Ideological Selfplacement:
dfOntario$Q30_1 <- dfOntario$Q30A

# Election ID for Province of Ontario:
dfOntario$ELECID <- 3

# Are view reflected in parliament:
dfOntario$Q49 <- dfOntario$Q49A

# Party Respondent feels close to:
dfOntario$Q47_1 <- dfOntario$Q47A

# Satisfaction with federal government performance (national level)
dfOntario$Q13A <- dfOntario$Q14

# Generate Dummy for Postsecondary education (0=no post secondary education,
# 1=postsecondary education) -> coded based on attainement

dfOntario$POSTSECONDARY <- NA
dfOntario$POSTSECONDARY <- ifelse(dfOntario$SD4==9 |
                                    dfOntario$SD4==10 |
                                    dfOntario$SD4==11, 1,0 )
dfOntario$POSTSECONDARY <- as.factor(dfOntario$POSTSECONDARY)
levels(dfOntario$POSTSECONDARY) <- c("No University Education", "University Education")

# Economic evaluations of Canada federal level (retrospective):
dfOntario$Q15A <- dfOntario$Q16

# Importance of national election in general:
dfOntario$ElectionImportance <- dfOntario$Q34_1
dfOntario$ElectionImportance[dfOntario$ElectionImportance==99] <- NA


#Generate list with variables to be kept:

myvarsOntario <- c("respid", "ELECID", "Q1B_BEGINNING", "Q1B_END", 
                   "Q2", "Q3", "Q3B", "Q5", "Q13A", "Q15A",
                   "Q19_1", "Q19_2", "Q19_3", "Q19_4", "Q19_5",
                   "Q17_party1", "Q17_party2", "Q17_party3", "Q17_party4", "Q17_party5",
                   "Q28_party1", "Q28_party2", "Q28_party3", "Q28_party4", "Q28_party5",
                   "Q30_party1", "Q30_party2", "Q30_party3", "Q30_party4", "Q30_party5",
                   "Q29", "Q30_1", "Q47_1", "Q49", "POSTSECONDARY", "PQ6", "Q18", "PQ5_1",
                   "PQ5_2", "gend", "age", "Q27", "Q27A", "Q26",
                   "Q20", "Q35_4", "Q11", "SD9", "POST_WEIGHT1", "POST_WEIGHT2",
                   "POST_WEIGHT3", "POST_WEIGHT3B", "POST_WEIGHT4", "ElectionImportance")

# Generate subsetted dataset for Ontario:
dfOntarioMEDW <- dfOntario[myvarsOntario]


#### >>> 1D - Append the three single datasets ####

dfCan2015 <- rbind(dfQuebecMEDW, dfBritishCMEDW, dfOntarioMEDW)

#### >>> 1E - save the dataset in R-Format ####
#ACHTUNG#
save(dfCan2015, file = "dfCan2015.RData")


#### >>> END OF R-Script ####